Chapter 5 Community composition
5.1 Count data preparation
genome_counts_log <- genome_counts %>%
column_to_rownames(var="genome") %>%
mutate_all(~log10(.+1)) #fixed: mutate_at(vars(), ~log10(.+1))) was not working
genome_counts_pivot <- genome_counts %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
mutate(phylum = fct_relevel(phylum, rev(ehi_phylum_colors$phylum))) #sort phyla by taxonomy
genome_counts_by_host <- sample_metadata %>%
select("sample","species","area_type", "development") %>%
rename(host_sp=species) %>%
left_join(genome_counts_pivot,., by=join_by("sample" == "sample")) #%>%
#mutate(sample=factor(sample, levels = sample_sort)) #alternative to join: sorting by area_type
# Retrieve taxonomy colors to use standardised EHI colors
phylum_colors <- ehi_phylum_colors %>%
filter(phylum %in% unique(genome_counts_by_host$phylum)) %>%
select(colors) %>%
pull() %>%
rev()
phylum_colors <- c(phylum_colors,"#cccccc") #REMOVE! ONLY FOR ARCHAEANS
# Which host species each genome can be found in
genomes_by_species <- genome_counts_by_host %>%
filter(count>0) %>%
group_by(genome) %>%
mutate(host = if_else(all(host_sp == "Sciurus vulgaris"), "only red",
if_else(all(host_sp == "Sciurus carolinensis"), "only grey", "both"))) %>%
select(genome, host) %>%
distinct(genome, .keep_all = TRUE) %>%
left_join(.,genome_metadata, by='genome')
genomes_by_species$host <-factor(genomes_by_species$host, levels = c("both", "only red", "only grey"))# Which phylum the MAG belongs to
phyla <- ehi_phylum_colors %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
arrange(match(genome, genome_tree$tip.label)) %>%
select(phylum, colors) %>%
unique()
# Generate the phylum color heatmap
phylum_heatmap <- ehi_phylum_colors %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
arrange(match(genome, genome_tree$tip.label)) %>%
select(genome,phylum) %>%
mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
column_to_rownames(var = "genome")
# Create baseline circular genome tree
circular_tree <- force.ultrametric(genome_tree,method="extend") %>%
ggtree(., layout = 'circular', size = 0.1, angle=45) +
xlim(-1, NA)***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
Also defined by 'tidytree'
Warning in stat_tree(data = data, mapping = mapping, geom = "segment", position = position, : Ignoring unknown parameters: `angle`
Ignoring unknown parameters: `angle`
# Add phylum colors ring
circular_tree <- gheatmap(circular_tree, phylum_heatmap, offset=0.05, width=0.2, colnames=FALSE) +
scale_fill_manual(values=colors_alphabetic, name="Phylum") +
#geom_tiplab2(size=1, hjust=-0.1) +
theme(legend.position = "right", plot.margin = margin(0, 0, 0, 0), panel.margin = margin(0, 0, 0, 0))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
#Flush color scale to enable a new color scheme in the next ring
circular_tree <- circular_tree + new_scale_fill()
# Add host ring
circular_tree_h <- circular_tree +
new_scale_fill() +
scale_fill_manual(values = c("black", "#ed2939", "#92a0ad"), name="Host\nspecies") + #"#cc3333", "#999999"
geom_fruit(
data=genomes_by_species,
geom=geom_tile,
mapping = aes(y=genome, fill=host),
offset = 0.55,
width=0.2)
#Plot circular tree
circular_tree_h***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
Also defined by 'tidytree'
#Add phylum colors
vertical_tree <- gheatmap(vertical_tree, phylum_heatmap, offset=0, width=0.1, colnames=FALSE, color=NA) +
scale_fill_manual(values=colors_alphabetic)Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
#Reset fill scale
vertical_tree <- vertical_tree + new_scale_fill()
#Add counts
vertical_tree <- gheatmap(vertical_tree, genome_counts_log, offset=0.5, width=3.5, color=NA, colnames=FALSE) + #, colnames_angle=90, font.size=2, colnames_position="top", colnames_offset_y = 9
vexpand(.08) +
coord_cartesian(clip = "off") +
scale_fill_gradient(low = "#f4f4f4", high = "steelblue", na.value="white")Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
5.2 Taxonomic composition of samples
# sample_sort <- sample_metadata %>%
# arrange(Area_type) %>%
# select(sample) %>%
# pull()
####TAXONOMIC COMPOSITION####
# Plot stacked barplot
ggplot(genome_counts_by_host, aes(x=sample,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.02)+ #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors, name="Phylum") +
labs(y = "Relative abundance") +
guides(fill = guide_legend(ncol = 1)) +
facet_nested(~host_sp, scales="free", space="free") +
theme(axis.text.x = element_blank(),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
legend.position="right",
)phylum_summary <- genome_counts_by_host %>%
group_by(sample,host_sp,phylum) %>%
summarise(relabun=sum(count))
phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun, na.rm=TRUE),sd=sd(relabun, na.rm=TRUE)) # A tibble: 13 × 3
phylum mean sd
<fct> <dbl> <dbl>
1 p__Elusimicrobiota 0.00129 0.00676
2 p__Cyanobacteriota 0.00636 0.00837
3 p__Bacillota 0.0443 0.104
4 p__Bacillota_B 0.00228 0.00354
5 p__Bacillota_C 0.00377 0.00577
6 p__Bacillota_A 0.543 0.256
7 p__Actinomycetota 0.0851 0.229
8 p__Patescibacteria 0.00000737 0.000102
9 p__Pseudomonadota 0.0168 0.0810
10 p__Campylobacterota 0.00264 0.0102
11 p__Bacteroidota 0.284 0.196
12 p__Verrucomicrobiota 0.0105 0.0203
13 p__Thermoplasmatota 0.000200 0.00210
phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun)) %>%
arrange(-mean) %>%
select(phylum) %>%
pull()
phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
scale_color_manual(values=phylum_colors[rev(phylum_arrange)], name="Phylum") +
geom_jitter(alpha=0.5) +
facet_nested(~host_sp, scales="free", space="free") +
theme_minimal() +
theme(legend.position="right") +
labs(y="Phylum",x="Relative abundance")family_summary <- genome_counts_by_host %>%
group_by(sample,host_sp,family) %>%
summarise(relabun=sum(count))
family_summary %>%
group_by(family) %>%
summarise(mean=mean(relabun, na.rm=TRUE),sd=sd(relabun, na.rm=TRUE)) %>%
arrange(-mean)# A tibble: 74 × 3
family mean sd
<chr> <dbl> <dbl>
1 f__Lachnospiraceae 0.365 0.202
2 f__Bacteroidaceae 0.190 0.168
3 f__Oscillospiraceae 0.0923 0.0934
4 f__Bifidobacteriaceae 0.0798 0.228
5 f__Muribaculaceae 0.0776 0.0964
6 f__Ruminococcaceae 0.0314 0.0387
7 f__Streptococcaceae 0.0164 0.0868
8 f__Borkfalkiaceae 0.0144 0.0200
9 f__Lactobacillaceae 0.0129 0.0405
10 f__CAG-74 0.00941 0.0128
# ℹ 64 more rows
family_arrange <- family_summary %>%
group_by(family) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(family) %>%
pull()
family_summary %>%
left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
scale_color_manual(values=colors_alphabetic) +
geom_jitter(alpha=0.5) +
facet_grid(.~host_sp)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")genus_summary <- genome_counts_by_host %>%
group_by(sample,host_sp,genus) %>%
summarise(relabun=sum(count))
genus_summary %>%
group_by(genus) %>%
summarise(mean=mean(relabun, na.rm=TRUE),sd=sd(relabun, na.rm=TRUE)) %>%
arrange(-mean)# A tibble: 348 × 3
genus mean sd
<chr> <dbl> <dbl>
1 g__Prevotella 0.0829 0.118
2 g__Bifidobacterium 0.0798 0.228
3 g__Acetatifactor 0.0488 0.0669
4 g__Eisenbergiella 0.0350 0.0560
5 g__ 0.0315 0.0349
6 g__Faecousia 0.0266 0.0362
7 g__Phocaeicola 0.0255 0.0437
8 g__CAG-95 0.0242 0.0369
9 g__Roseburia 0.0226 0.0535
10 g__UBA3282 0.0193 0.0267
# ℹ 338 more rows
genus_arrange <- genus_summary %>%
group_by(genus) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(genus) %>%
pull()
genus_summary %>%
left_join(genome_metadata %>% select(genus,phylum) %>% unique(),by=join_by(genus==genus)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(genus %in% genus_arrange[1:20]) %>%
mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=colors_alphabetic) +
geom_jitter(alpha=0.5) +
facet_grid(.~host_sp)+
theme_minimal() +
labs(y="Genus", x="Relative abundance", color="Phylum")Warning in left_join(., genome_metadata %>% select(genus, phylum) %>% unique(), : Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 24 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.